*This is the process for getting firm level NACIS
*Now at the ein level

global projectdir "~"
global datadir "$projectdir/data"

cd $datadir

foreach year of numlist 2002(1)2016 {

use ./raw_pulls/w2_ssl_usiris/lbd_w2_ein_mu_`year', clear

*Rename to fit into Nathan's code
gen year=`year'
gen fk_naics07_6=substr(fk_naics12,1,6)
gen emp=pay
rename firmid firmid_hold
gen firmid=ein

*This code is from Nathan Goldschlag
    // find firm naics
    // trim the firms naics codes to those in the dominant 2, 3, 4, 5, and 6 digit codes
    // ties are resolved randomly, down weighting missing naics information 
    display "Processing: create firm dominant FK NAICS" 
    gen n2=substr(fk_naics07_6,1,2)
    gen n3=substr(fk_naics07_6,1,3)
    gen n4=substr(fk_naics07_6,1,4)
    gen n5=substr(fk_naics07_6,1,5)
    gen n6=substr(fk_naics07_6,1,6)
    gen r=uniform()
    replace r=0 if missing(fk_naics07_6)
    // loop through naics levels
    foreach i of num 2(1)6 {
        // find i-digit naics with largest emp
        bys year firmid n`i': egen n`i'emp=sum(emp)
        // sort largest i-digit naics by emp, random number breaks ties
        gsort year firmid -n`i'emp r
        bys year firmid: drop if substr(fk_naics07_6,1,`i')!=n`i'[1]
    }
    keep firmid year fk_naics07_6
    rename fk_naics07_6 f_fk_naics07_6
    duplicates drop
    
    rename firmid ein
    rename f_fk_naics07_6 f_fk_naics12_6
    // save to disk to use in firm-level panel
    save ./intermediate_files/BR/ein_naics_`year'.dta, replace
    shell chmod 774 ./intermediate_files/BR/ein_naics_`year'.dta
}

